import fitz  # PyMuPDF
import io
import os
from PIL import Image
import sys
import cv2
import numpy as np
sys.path.append('/home/FAST_DATA_MIRROR/Langchain-Chatchat-master')
from pdf_tools.duguang import Duguang

class ReadImInPdf:
    def __init__(self) -> None:
        self.duguang = Duguang()
    def pdf_image_extract(self, path):
        doc = fitz.open(path)
        for i in range(len(doc)):
            # 将PDF页面转换为图片
            pix = doc.get_page_pixmap(i)
            img = np.frombuffer(pix.samples, np.uint8).reshape(pix.h, pix.w, 3)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   

            image_cv2 = self.duguang.infer(img)
            Image.fromarray(image_cv2).save(f"/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/pdf_tools/output/text/{i}.png")
if __name__ == '__main__':
    pdf_image = ReadImInPdf()
    pdf_image.pdf_image_extract('/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/云鹰平台服务系统接口协议V1.8.4.pdf')